In [1]:
import numpy as np
import pandas as pd
In [2]:
famille_pandas = [
    np.array([50, 2.5, 10, 40]),
    np.array([100, 5, 20, 80]),
    np.array([110, 6, 22, 80])
    
]
In [3]:
famille_pandas_numpy = np.array(famille_pandas)
In [4]:
famille_pandas_numpy
Out[4]:
array([[ 50. ,   2.5,  10. ,  40. ],
       [100. ,   5. ,  20. ,  80. ],
       [110. ,   6. ,  22. ,  80. ]])
In [31]:
famille_pandas_numpy[:, 0]
Out[31]:
array([ 50., 100., 110.])
In [32]:
famille_pandas_numpy[:]
Out[32]:
array([[ 50. ,   2.5,  10. ,  40. ],
       [100. ,   5. ,  20. ,  80. ],
       [110. ,   6. ,  22. ,  80. ]])
In [33]:
famille_pandas_numpy[:4]
Out[33]:
array([[ 50. ,   2.5,  10. ,  40. ],
       [100. ,   5. ,  20. ,  80. ],
       [110. ,   6. ,  22. ,  80. ]])
In [ ]:
 
In [49]:
data.loc['california']
Out[49]:
area      423967.0
pop     38332521.0
Name: california, dtype: float64

Concatenation ou l'union de données¶

définir une fonction qui va génerer automatiquement des DataFrame¶
In [50]:
def make_df(cols, ind):
    """crée rapidement des DataFrame"""
    data = {c: [str(c) + str(i) for i in ind]
           for c in cols}
    return pd.DataFrame(data, ind)
Example 1¶
In [51]:
make_df('ABC', range(3))
Out[51]:
A B C
0 A0 B0 C0
1 A1 B1 C1
2 A2 B2 C2
Concatenation "concat"¶
In [52]:
df1 = make_df('AB', [1, 2])
In [53]:
df2 = make_df('AB', [3, 4])
In [55]:
pd.concat([df1, df2])
Out[55]:
A B
1 A1 B1
2 A2 B2
3 A3 B3
4 A4 B4
La jointure¶
In [62]:
# les noms, le departement et la date d'entrée

df1 = pd.DataFrame({'employee': ['Bob', 'Jake', 'Lisa', 'Sue'],  
                   'department': ['Accounting', 'Engenineering', 'Engenineering', 'HR']})
df2 = pd.DataFrame({'employee': ['Lisa', 'Bob', 'Jake', 'Sue'],
                   'date': [2004, 2008, 2012, 2014]})
In [63]:
df3 = pd.merge(df1, df2)
In [64]:
df3
Out[64]:
employee department date
0 Bob Accounting 2008
1 Jake Engenineering 2012
2 Lisa Engenineering 2004
3 Sue HR 2014
Un autre tableau avec le Manager¶
In [65]:
df4 = pd.DataFrame({'department': ['Accounting', 'Engenineering', 'HR'],
                   'supervisor':['carly', 'Guido', 'Steve']})
Jointure plusieurs A1¶
In [66]:
pd.merge(df3, df4)
Out[66]:
employee department date supervisor
0 Bob Accounting 2008 carly
1 Jake Engenineering 2012 Guido
2 Lisa Engenineering 2004 Guido
3 Sue HR 2014 Steve
Jointure plusieurs à plusieurs¶
In [71]:
df5 = pd.DataFrame({'department': ['Accounting', 'Accounting',
                                  'Engineering', 'Engineering', 'HR','HR'],
                   'competence': ['math', 'spreadsheets', 'coding','linux',
                   'spreadsheets', 'organization']})
In [72]:
pd.merge(df1, df5)
Out[72]:
employee department competence
0 Bob Accounting math
1 Bob Accounting spreadsheets
2 Sue HR spreadsheets
3 Sue HR organization
In [ ]: